In [8]:
import dask.array as da
Dask is a thread process scheduler and ...
In [7]:
import numpy as np
x = np.arange(25)
In [3]:
x
Out[3]:
In [9]:
x = da.arange(25, chunks=(5,))
y = x ** 2
y
y.visualize()
Out[9]:
In [11]:
da.sqrt(x)[-1].visualize()
x = da.arange(250, chunks=(5,))
x.visualize()
Out[11]:
In [13]:
x = da.ones((15, 15), chunks=(5,5))
x.sum(axis=1).visualize()
Out[13]:
In [14]:
import dask.multiprocessing
y.compute(get = dask.multiprocessing.get)
Out[14]:
In [15]:
import dask.dataframe as dd
In [16]:
cols = ['square_id', 'timestamp', 'country_code',
'sms_in', 'sms_out','call_in','call_out', 'internet']
dtypes = {'square_id': int, 'timestamp': int, 'countrycode': int,
'sms_in': float,'sms_out': float, 'call_in': float, 'call_out': float, 'internet': float}
In [17]:
df = dd.read_csv?
In [ ]:
df = dd.read_csv
In [24]:
df_a = dd.read_csv('data/split/*.csv', header=0, names=cols, dtype=dtypes, sep="\t")
In [28]: